source('../settings/settings.R')
source('commonFunctions.R')
drive1 <- read.csv('../data/processed/analysis/TT1_Drive_1_PP.csv')
drive2 <- read.csv('../data/processed/Analysis/TT1_Drive_2_PP.csv')
drive3 <- read.csv('../data/processed/Analysis/TT1_Drive_3_PP.csv')
drive4 <- read.csv('../data/processed/Analysis/TT1_Drive_4_PP.csv', stringsAsFactors = T)
set.seed(43)
combinedDf <- cbind(drive4, 
                    drive1$MeanPP_Seg0, 
                    drive2$MeanPP, drive3$MeanPP,
                    drive2$MeanPP_SegMax, drive3$MeanPP_SegMax, 
                    drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
                    drive2$StdPP, drive3$StdPP 
                  )
names(combinedDf) <- c(names(drive4), 
                       "PP_Dev_1_Turning",
                       "PP_Dev_2", "PP_Dev_3", 
                       "PP_Dev_2_Straight", "PP_Dev_3_Straight", 
                       "PP_Dev_2_Turning", "PP_Dev_3_Turning", 
                       "Std_PP_2", "Std_PP_3")

combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]

combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='rgb(158,202,225)')
COLOR_FAILURE <- list(color='red')

yAxis <- list(
  title = 'Perinasal Perspiration (Log)',
  range=c(-0.3, 0.5)
)

# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_Dev
ppDevArray <- matrix(ppDev ,nrow = 1,ncol = length(ppDev))
  
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))
[1] "Threshold: 0.062365390625"
MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#01", xend="#41", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="No Stressor")

htmltools::tagList(fig_NoStressor)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#02", xend="#22", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#02", xend="#22", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Cognitive")

htmltools::tagList(fig_Cognitive)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Arousal in Drive C - Straight segment', marker=COLOR_COGNITIVE, width=870) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Arousal in Drive M - Straight segment', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Arousal in Drive C - Turning segment', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Arousal in Drive M - Turning segment', marker=COLOR_MOTORIC) %>%
  add_trace(y = ~PP_Prior, name = 'Arousal in Drive F - Under prior stressor', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Arousal in Drive F - Unintended acceleration', marker=COLOR_FAILURE) %>% 
  add_segments(x="#05", xend="#31", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#05", xend="#31", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Motoric")

htmltools::tagList(fig_Motoric)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)

Extract data for important features

importantFeaturesDf <- combinedDf %>% select(Subject, Std_PP_3, PP_Dev_2_Turning, Activity, PP_Dev, PP_Dev_Group)
linearModel1 <- lm(PP_Dev ~ 
              + abs(PP_Dev_2_Straight)
              + abs(PP_Dev_3_Straight)
              + abs(PP_Dev_2_Turning) 
              + abs(PP_Dev_3_Turning)
              # + Std_PP_2
              + Std_PP_3
              + abs(PP_Prior)
              + factor(Activity), 
            data=combinedDf)

# anova(model)
summary(linearModel1)

Call:
lm(formula = PP_Dev ~ +abs(PP_Dev_2_Straight) + abs(PP_Dev_3_Straight) + 
    abs(PP_Dev_2_Turning) + abs(PP_Dev_3_Turning) + Std_PP_3 + 
    abs(PP_Prior) + factor(Activity), data = combinedDf)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.07749 -0.02637 -0.01358  0.03307  0.07623 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)            -0.02798    0.05752  -0.486 0.635443    
abs(PP_Dev_2_Straight) -0.95922    0.29384  -3.264 0.006772 ** 
abs(PP_Dev_3_Straight) -0.79025    0.31249  -2.529 0.026476 *  
abs(PP_Dev_2_Turning)   1.46882    0.33992   4.321 0.000994 ***
abs(PP_Dev_3_Turning)   0.86005    0.33237   2.588 0.023761 *  
Std_PP_3               -0.72967    0.54136  -1.348 0.202602    
abs(PP_Prior)           0.65601    0.25341   2.589 0.023715 *  
factor(Activity)M       0.08515    0.03785   2.250 0.044008 *  
factor(Activity)NO     -0.11627    0.03973  -2.927 0.012681 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05538 on 12 degrees of freedom
Multiple R-squared:  0.7604,    Adjusted R-squared:  0.6007 
F-statistic:  4.76 on 8 and 12 DF,  p-value: 0.008009
plot(linearModel1)

linearModel2 <- lme(PP_Dev ~ 
              abs(PP_Dev_2_Straight)
              + abs(PP_Dev_3_Straight)
              + abs(PP_Dev_2_Turning) 
              + abs(PP_Dev_3_Turning)
              + Std_PP_3
              + abs(PP_Prior)
              + factor(Activity), 
            random=~1|Subject,
            data=combinedDf,
            method="REML")

# anova(linearModel2)
summary(linearModel2)
Linear mixed-effects model fit by REML
 Data: combinedDf 

Random effects:
 Formula: ~1 | Subject
        (Intercept)   Residual
StdDev:  0.05185807 0.01944678

Fixed effects: PP_Dev ~ abs(PP_Dev_2_Straight) + abs(PP_Dev_3_Straight) + abs(PP_Dev_2_Turning) +      abs(PP_Dev_3_Turning) + Std_PP_3 + abs(PP_Prior) + factor(Activity) 
 Correlation: 
                       (Intr) a(PP_D_2_S a(PP_D_3_S a(PP_D_2_T a(PP_D_3_T S_PP_3 a(PP_P fc(A)M
abs(PP_Dev_2_Straight)  0.360                                                                 
abs(PP_Dev_3_Straight)  0.359  0.194                                                          
abs(PP_Dev_2_Turning)  -0.224 -0.888     -0.390                                               
abs(PP_Dev_3_Turning)  -0.659 -0.424     -0.777      0.405                                    
Std_PP_3               -0.599  0.348     -0.243     -0.443      0.279                         
abs(PP_Prior)          -0.212 -0.678     -0.309      0.685      0.238     -0.350              
factor(Activity)M      -0.006 -0.289      0.354      0.136     -0.153     -0.452  0.139       
factor(Activity)NO     -0.134  0.168      0.529     -0.413     -0.341      0.141 -0.285  0.454

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-0.49125967 -0.16717479 -0.08612616  0.20962762  0.48331197 

Number of Observations: 21
Number of Groups: 21 
plot(linearModel2)

Linear Model from Drive C

linearModelC <- lm(PP_Dev ~
              abs(PP_Dev_2)
              + Std_PP_2
              + factor(Activity),
            data=combinedDf)

# anova(model)
summary(linearModelC)

Call:
lm(formula = PP_Dev ~ abs(PP_Dev_2) + Std_PP_2 + factor(Activity), 
    data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.099706 -0.066065  0.001924  0.059177  0.111504 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)  
(Intercept)         0.007377   0.045086   0.164   0.8721  
abs(PP_Dev_2)       0.162388   0.132768   1.223   0.2390  
Std_PP_2            0.455822   0.351120   1.298   0.2126  
factor(Activity)M   0.077066   0.041038   1.878   0.0787 .
factor(Activity)NO -0.034949   0.041624  -0.840   0.4135  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0739 on 16 degrees of freedom
Multiple R-squared:  0.4313,    Adjusted R-squared:  0.2891 
F-statistic: 3.033 on 4 and 16 DF,  p-value: 0.04872
plot(linearModelC)

linearModelC_Segments <- lm(PP_Dev ~ 
                abs(PP_Dev_2_Straight)
              + abs(PP_Dev_2_Turning)
              + Std_PP_2,
            data=combinedDf)

# anova(model)
summary(linearModelC_Segments)

Call:
lm(formula = PP_Dev ~ abs(PP_Dev_2_Straight) + abs(PP_Dev_2_Turning) + 
    Std_PP_2, data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.138763 -0.051165  0.000299  0.063907  0.133769 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)
(Intercept)             0.009328   0.049145   0.190    0.852
abs(PP_Dev_2_Straight) -0.179484   0.252076  -0.712    0.486
abs(PP_Dev_2_Turning)   0.414303   0.309402   1.339    0.198
Std_PP_2                0.320646   0.411684   0.779    0.447

Residual standard error: 0.08508 on 17 degrees of freedom
Multiple R-squared:  0.1989,    Adjusted R-squared:  0.05756 
F-statistic: 1.407 on 3 and 17 DF,  p-value: 0.2751
plot(linearModelC_Segments)

Linear Model from Drive M

combinedDfNoOutlier <- combinedDf[combinedDf$Subject != "#05",]
linearModelM <- lm(PP_Dev ~ 
              PP_Dev_3
              + abs(PP_Dev_2_Turning)
              + Std_PP_3,
            data=combinedDfNoOutlier)

# anova(model)
summary(linearModelM)

Call:
lm(formula = PP_Dev ~ PP_Dev_3 + abs(PP_Dev_2_Turning) + Std_PP_3, 
    data = combinedDfNoOutlier)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.118324 -0.049364 -0.006116  0.042864  0.103321 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)  
(Intercept)           -0.14530    0.07196  -2.019   0.0605 .
PP_Dev_3              -0.30294    0.19106  -1.586   0.1324  
abs(PP_Dev_2_Turning)  0.20037    0.15486   1.294   0.2141  
Std_PP_3               2.44903    0.85446   2.866   0.0112 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07102 on 16 degrees of freedom
Multiple R-squared:  0.4746,    Adjusted R-squared:  0.3761 
F-statistic: 4.818 on 3 and 16 DF,  p-value: 0.01413
plot(linearModelM)

# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel1)$coefficients
lmAnova <- anova(linearModel1)

print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Thu Jul  9 17:24:21 2020
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
  \hline
(Intercept) & -0.02798 & 0.05752 & -0.48640 & 0.63544 \\ 
  abs(PP\_Dev\_2\_Straight) & -0.95922 & 0.29384 & -3.26449 & 0.00677 \\ 
  abs(PP\_Dev\_3\_Straight) & -0.79025 & 0.31249 & -2.52884 & 0.02648 \\ 
  abs(PP\_Dev\_2\_Turning) & 1.46882 & 0.33992 & 4.32105 & 0.00099 \\ 
  abs(PP\_Dev\_3\_Turning) & 0.86005 & 0.33237 & 2.58763 & 0.02376 \\ 
  Std\_PP\_3 & -0.72967 & 0.54136 & -1.34785 & 0.20260 \\ 
  abs(PP\_Prior) & 0.65601 & 0.25341 & 2.58868 & 0.02372 \\ 
  factor(Activity)M & 0.08515 & 0.03785 & 2.24987 & 0.04401 \\ 
  factor(Activity)NO & -0.11627 & 0.03973 & -2.92673 & 0.01268 \\ 
   \hline
\end{tabular}
\end{table}
print(xtable(lmAnova), digits=c(0,5,5,5,5))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Thu Jul  9 17:24:21 2020
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrr}
  \hline
 & Df & Sum Sq & Mean Sq & F value & Pr($>$F) \\ 
  \hline
abs(PP\_Dev\_2\_Straight) & 1 & 0.01 & 0.01 & 3.23 & 0.0973 \\ 
  abs(PP\_Dev\_3\_Straight) & 1 & 0.00 & 0.00 & 0.30 & 0.5922 \\ 
  abs(PP\_Dev\_2\_Turning) & 1 & 0.02 & 0.02 & 5.33 & 0.0395 \\ 
  abs(PP\_Dev\_3\_Turning) & 1 & 0.00 & 0.00 & 0.63 & 0.4412 \\ 
  Std\_PP\_3 & 1 & 0.01 & 0.01 & 3.53 & 0.0847 \\ 
  abs(PP\_Prior) & 1 & 0.00 & 0.00 & 0.36 & 0.5585 \\ 
  factor(Activity) & 2 & 0.08 & 0.04 & 12.34 & 0.0012 \\ 
  Residuals & 12 & 0.04 & 0.00 &  &  \\ 
   \hline
\end{tabular}
\end{table}
combinedDf$PP_Dev <- NULL

combinedDf$Subject <- NULL
combinedDf$Activity_NO <- ifelse(combinedDf$Activity == "NO", 1, 0)
combinedDf$Activity_C <- ifelse(combinedDf$Activity == "C", 1, 0)
combinedDf$Activity_M <- ifelse(combinedDf$Activity == "M", 1, 0)
combinedDf$Activity <- NULL
combinedDf$PP_Dev_1_Turning <- NULL

# According to Linear model
combinedDf$PP_Dev_3_Straight <- abs(combinedDf$PP_Dev_3_Straight)
combinedDf$PP_Dev_2_Turning <- abs(combinedDf$PP_Dev_2_Turning)
combinedDf$PP_Dev_3_Turning <- abs(combinedDf$PP_Dev_3_Turning)
combinedDf$PP_Prior <- abs(combinedDf$PP_Prior) # NULL

combinedDf$Class <- ifelse(combinedDf$PP_Dev_Group == 1, T, F)
combinedDf$PP_Dev_Group <- NULL

print(names(combinedDf))
 [1] "PP_Prior"          "PP_Dev_2"          "PP_Dev_3"          "PP_Dev_2_Straight" "PP_Dev_3_Straight" "PP_Dev_2_Turning" 
 [7] "PP_Dev_3_Turning"  "Std_PP_2"          "Std_PP_3"          "Activity_NO"       "Activity_C"        "Activity_M"       
[13] "Class"            
# library(mefa)
# combinedDf <- rep(combinedDf, 10) 
set.seed(39)
n_folds <- 3
params <- param <- list(objective       = "binary:logistic", 
               booster          = "gbtree",
               eval_metric      = "auc",
               eta              = 0.1,
               max_depth        = 10,
               alpha            = 1,
               lambda           = 0,
               gamma            = 0.45,
               min_child_weight = 0.3,
               subsample        = 1,
               colsample_bytree = 1)
           
# XGBoost Model         
xgb_m <- xgb.cv(   params               = param,
                  data = as.matrix(combinedDf %>% select(-Class)) ,
                  label =  combinedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = T, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 7/14)

# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]
NA
# Prediction
combinedDf$clsPred <- round(xgb_m$pred)

computePerformanceResults <- function(sdat){
  sdat = sdat[complete.cases(sdat),]
  acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
  conf_mat = table(sdat)
  specif = conf_mat[1,1]/sum(conf_mat[,1])
  sensiv = conf_mat[2,2]/sum(conf_mat[,2])
  preci =  conf_mat[2,2]/sum(conf_mat[2,])
  npv =    conf_mat[1,1]/sum(conf_mat[1,])
  return(c(acc,specif,sensiv,preci,npv))
}

# Get average performance
performance <- computePerformanceResults(combinedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])

print(paste("Accuracy=", round(acc, 2)))
[1] "Accuracy= 0.9"
print(paste("Precision=", round(prec, 2)))
[1] "Precision= 0.92"
print(paste("Recall=", round(recall, 2)))
[1] "Recall= 0.92"
print(paste("Specificity=", round(spec, 2)))
[1] "Specificity= 0.88"
print(paste("NPV=", round(npv, 2)))
[1] "NPV= 0.88"
print(paste("F1=", round(f1, 2)))
[1] "F1= 0.92"
print(paste("AUC=", round(auc, 2)))
[1] "AUC= 0.91"
# Importance
bst <- xgboost(   params               = param,
                  data = as.matrix(combinedDf %>% select(-Class)) ,
                  label =  combinedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = T, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)
importanceDf <- xgb.importance(colnames(combinedDf), model = bst)
print(importanceDf)
library(pROC)

dfROC <- pROC::roc(response = ifelse(combinedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<")

# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter 

plot(pROC::roc(response = ifelse(combinedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<"), 
     legacy.axes = TRUE,
     main="ROC Curve", 
     lwd=1.5) 

Important features

# Eleminate #5 who has an exceptional data to find a better threshold
stdPP3 <- sort(importantFeaturesDf$Std_PP_3, decreasing = T)[2:length(importantFeaturesDf$Std_PP_3)]
stdPP3Array <- matrix(stdPP3 ,nrow = 1,ncol = length(stdPP3))
  
maxStdPP3 <- sort(importantFeaturesDf$Std_PP_3, decreasing = T)[2]
PP_DEV_3_THRESHOLD <- otsu(stdPP3Array, range=c(min(stdPP3), maxStdPP3)) # Expected Threshold = 0.088
print(paste0('Threshold: ', PP_DEV_3_THRESHOLD))
[1] "Threshold: 0.0881111526518663"
importantFeaturesDf$PP_Dev_3_Group <- ifelse(importantFeaturesDf$Std_PP_3 > PP_DEV_3_THRESHOLD, 1, 0)
write.csv(importantFeaturesDf, "../outputs/importantFeatures.csv")

Venn diagram

library(VennDiagram)
library(RColorBrewer)
 
M_Low <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_3_Group==0,])
M_High <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_3_Group==1,])

F_Low <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_Group==0,])
F_High <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_Group==1,])

jpeg("../plots/venn/venn_All.png", res=150, width=900)
venn.plot <- venn.diagram(
  list(M_Low, F_Low, M_High, F_High), NULL, 
  fill=c("blue", "blue", "red", "red"), 
  alpha=c(0.5,0.5,0.5,0.5), 
  resolution = 150,
  cex = 1, 
  cat.fontface=1, 
  category.names=c("Drive=M\n SD=Low\n", "Drive=F\n Arousal=Low\n", "Drive=M\n SD=High\n", "Drive=F\n Arousal=High\n")
)
grid.draw(venn.plot)
dev.off()
null device 
          1 
# 
# jpeg("../plots/venn/venn_High.png", res=150, width=700)
# venn.plot <- venn.diagram(
#   list(M_High, F_High), NULL, 
#   fill=c("pink", "red"), 
#   alpha=c(0.5,0.5), 
#   resolution = 150,
#   cex = 1, 
#   cat.fontface=1, 
#   category.names=c("Drive=M", "Drive=F")
# )
# grid.draw(venn.plot)
# dev.off()

Plot feature importance

yAxis <- list(
  title = 'Importance',
  range=c(0.0, 1.0)
)
xAxis <- list(
  title = ''
)
importanceDf$FeatureName <- lapply(importanceDf$Feature, function(x) {
  ifelse(x=="Std_PP_3", "SD of Arousal\n in Drive M", 
         ifelse(x=="PP_Dev_2_Turning", "Arousal in Drive C\nat turning segments", 
            ifelse(x=="Activity_C", "Prior stressor\n is Cognitive", x)))
})

fig_Importance <- plot_ly(importanceDf, x = ~FeatureName, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
  add_trace(y = ~Cover, name = 'Cover') %>% 
  add_trace(y = ~Frequency, name = 'Frequency') %>% 
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>% 
  config(.Last.value, mathjax = 'cdn')

htmltools::tagList(fig_Importance)

Feature

classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, x = ~Std_PP_3, y = ~PP_Dev, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive \n Hyphenated line indicates discriminative boundary"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1, color="green"))
  ))
htmltools::tagList(figStdVsDev)
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
'layout' objects don't have these attributes: 'showscale'
Valid attributes include:
'font', 'title', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
'layout' objects don't have these attributes: 'showscale'
Valid attributes include:
'font', 'title', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, x = ~abs(PP_Dev_2_Turning), y = ~PP_Dev, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1))
  ))
htmltools::tagList(figStdVsDev)
classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, y = ~abs(PP_Dev_2_Turning), x = ~Std_PP_3, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1))
  ))
htmltools::tagList(figStdVsDev)
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
'layout' objects don't have these attributes: 'showscale'
Valid attributes include:
'font', 'title', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
'layout' objects don't have these attributes: 'showscale'
Valid attributes include:
'font', 'title', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
---
title: "R Notebook"
output: html_notebook
---

```{r}
source('../settings/settings.R')
source('commonFunctions.R')
```

```{r}
drive1 <- read.csv('../data/processed/analysis/TT1_Drive_1_PP.csv')
drive2 <- read.csv('../data/processed/Analysis/TT1_Drive_2_PP.csv')
drive3 <- read.csv('../data/processed/Analysis/TT1_Drive_3_PP.csv')
drive4 <- read.csv('../data/processed/Analysis/TT1_Drive_4_PP.csv', stringsAsFactors = T)
```

```{r}
set.seed(43)
combinedDf <- cbind(drive4, 
                    drive1$MeanPP_Seg0, 
                    drive2$MeanPP, drive3$MeanPP,
                    drive2$MeanPP_SegMax, drive3$MeanPP_SegMax, 
                    drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
                    drive2$StdPP, drive3$StdPP 
                  )
names(combinedDf) <- c(names(drive4), 
                       "PP_Dev_1_Turning",
                       "PP_Dev_2", "PP_Dev_3", 
                       "PP_Dev_2_Straight", "PP_Dev_3_Straight", 
                       "PP_Dev_2_Turning", "PP_Dev_3_Turning", 
                       "Std_PP_2", "Std_PP_3")

combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
```

```{r}
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]

combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
```

```{r}
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='rgb(158,202,225)')
COLOR_FAILURE <- list(color='red')

yAxis <- list(
  title = 'Perinasal Perspiration (Log)',
  range=c(-0.3, 0.5)
)

# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_Dev
ppDevArray <- matrix(ppDev ,nrow = 1,ncol = length(ppDev))
  
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))

MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
```

```{r, warning=F}
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#01", xend="#41", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="No Stressor")

htmltools::tagList(fig_NoStressor)
```

```{r, warning=F}
fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#02", xend="#22", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#02", xend="#22", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Cognitive")

htmltools::tagList(fig_Cognitive)
```



```{r, warning=F}
fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Arousal in Drive C - Straight segment', marker=COLOR_COGNITIVE, width=870) %>%
  add_trace(y = ~PP_Dev_3_Straight, name = 'Arousal in Drive M - Straight segment', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Arousal in Drive C - Turning segment', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Arousal in Drive M - Turning segment', marker=COLOR_MOTORIC) %>%
  add_trace(y = ~PP_Prior, name = 'Arousal in Drive F - Under prior stressor', marker=COLOR_FAILURE_PRIOR) %>% 
  add_trace(y = ~PP_Dev, name = 'Arousal in Drive F - Unintended acceleration', marker=COLOR_FAILURE) %>% 
  add_segments(x="#05", xend="#31", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#05", xend="#31", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Motoric")

htmltools::tagList(fig_Motoric)
```


```{r}
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)
```

### Extract data for important features
```{r}
importantFeaturesDf <- combinedDf %>% select(Subject, Std_PP_3, PP_Dev_2_Turning, Activity, PP_Dev, PP_Dev_Group)
```

```{r}
linearModel1 <- lm(PP_Dev ~ 
              + abs(PP_Dev_2_Straight)
              + abs(PP_Dev_3_Straight)
              + abs(PP_Dev_2_Turning) 
              + abs(PP_Dev_3_Turning)
              # + Std_PP_2
              + Std_PP_3
              + abs(PP_Prior)
              + factor(Activity), 
            data=combinedDf)

# anova(model)
summary(linearModel1)
plot(linearModel1)
```

```{r}
linearModel2 <- lme(PP_Dev ~ 
              abs(PP_Dev_2_Straight)
              + abs(PP_Dev_3_Straight)
              + abs(PP_Dev_2_Turning) 
              + abs(PP_Dev_3_Turning)
              + Std_PP_3
              + abs(PP_Prior)
              + factor(Activity), 
            random=~1|Subject,
            data=combinedDf,
            method="REML")

# anova(linearModel2)
summary(linearModel2)
plot(linearModel2)
```

# Linear Model from Drive C
```{r}
linearModelC <- lm(PP_Dev ~
              abs(PP_Dev_2)
              + Std_PP_2
              + factor(Activity),
            data=combinedDf)

# anova(model)
summary(linearModelC)
plot(linearModelC)
```

```{r}
linearModelC_Segments <- lm(PP_Dev ~ 
                abs(PP_Dev_2_Straight)
              + abs(PP_Dev_2_Turning)
              + Std_PP_2,
            data=combinedDf)

# anova(model)
summary(linearModelC_Segments)
plot(linearModelC_Segments)
```

# Linear Model from Drive M
```{r}
combinedDfNoOutlier <- combinedDf[combinedDf$Subject != "#05",]
linearModelM <- lm(PP_Dev ~ 
              PP_Dev_3
              + Std_PP_3,
            data=combinedDfNoOutlier)

# anova(model)
summary(linearModelM)
plot(linearModelM)
```

```{r}
# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel1)$coefficients
lmAnova <- anova(linearModel1)

print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
print(xtable(lmAnova), digits=c(0,5,5,5,5))

```


```{r}
combinedDf$PP_Dev <- NULL

combinedDf$Subject <- NULL
combinedDf$Activity_NO <- ifelse(combinedDf$Activity == "NO", 1, 0)
combinedDf$Activity_C <- ifelse(combinedDf$Activity == "C", 1, 0)
combinedDf$Activity_M <- ifelse(combinedDf$Activity == "M", 1, 0)
combinedDf$Activity <- NULL
combinedDf$PP_Dev_1_Turning <- NULL

# According to Linear model
combinedDf$PP_Dev_3_Straight <- abs(combinedDf$PP_Dev_3_Straight)
combinedDf$PP_Dev_2_Turning <- abs(combinedDf$PP_Dev_2_Turning)
combinedDf$PP_Dev_3_Turning <- abs(combinedDf$PP_Dev_3_Turning)
combinedDf$PP_Prior <- abs(combinedDf$PP_Prior) # NULL

combinedDf$Class <- ifelse(combinedDf$PP_Dev_Group == 1, T, F)
combinedDf$PP_Dev_Group <- NULL

print(names(combinedDf))
```

```{r}
# library(mefa)
# combinedDf <- rep(combinedDf, 10) 
```

```{r}
set.seed(39)
n_folds <- 3
params <- param <- list(objective       = "binary:logistic", 
               booster          = "gbtree",
               eval_metric      = "auc",
               eta              = 0.1,
               max_depth        = 10,
               alpha            = 1,
               lambda           = 0,
               gamma            = 0.45,
               min_child_weight = 0.3,
               subsample        = 1,
               colsample_bytree = 1)
           
# XGBoost Model         
xgb_m <- xgb.cv(   params               = param,
                  data = as.matrix(combinedDf %>% select(-Class)) ,
                  label =  combinedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = T, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 7/14)

# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]

```
```{r}
# Prediction
combinedDf$clsPred <- round(xgb_m$pred)

computePerformanceResults <- function(sdat){
  sdat = sdat[complete.cases(sdat),]
  acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
  conf_mat = table(sdat)
  specif = conf_mat[1,1]/sum(conf_mat[,1])
  sensiv = conf_mat[2,2]/sum(conf_mat[,2])
  preci =  conf_mat[2,2]/sum(conf_mat[2,])
  npv =    conf_mat[1,1]/sum(conf_mat[1,])
  return(c(acc,specif,sensiv,preci,npv))
}

# Get average performance
performance <- computePerformanceResults(combinedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])

print(paste("Accuracy=", round(acc, 2)))
print(paste("Precision=", round(prec, 2)))
print(paste("Recall=", round(recall, 2)))
print(paste("Specificity=", round(spec, 2)))
print(paste("NPV=", round(npv, 2)))
print(paste("F1=", round(f1, 2)))
print(paste("AUC=", round(auc, 2)))
```

```{r}
# Importance
bst <- xgboost(   params               = param,
                  data = as.matrix(combinedDf %>% select(-Class)) ,
                  label =  combinedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = T, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)
importanceDf <- xgb.importance(colnames(combinedDf), model = bst)
print(importanceDf)
```

```{r}
library(pROC)

dfROC <- pROC::roc(response = ifelse(combinedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<")

# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter 

plot(pROC::roc(response = ifelse(combinedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<"), 
     legacy.axes = TRUE,
     main="ROC Curve", 
     lwd=1.5) 
```

# Important features
```{r}
# Eleminate #5 who has an exceptional data to find a better threshold
stdPP3 <- sort(importantFeaturesDf$Std_PP_3, decreasing = T)[2:length(importantFeaturesDf$Std_PP_3)]
stdPP3Array <- matrix(stdPP3 ,nrow = 1,ncol = length(stdPP3))
  
maxStdPP3 <- sort(importantFeaturesDf$Std_PP_3, decreasing = T)[2]
PP_DEV_3_THRESHOLD <- otsu(stdPP3Array, range=c(min(stdPP3), maxStdPP3)) # Expected Threshold = 0.088
print(paste0('Threshold: ', PP_DEV_3_THRESHOLD))

importantFeaturesDf$PP_Dev_3_Group <- ifelse(importantFeaturesDf$Std_PP_3 > PP_DEV_3_THRESHOLD, 1, 0)
write.csv(importantFeaturesDf, "../outputs/importantFeatures.csv")
```

# Venn diagram
```{r}
library(VennDiagram)
library(RColorBrewer)
 
M_Low <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_3_Group==0,])
M_High <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_3_Group==1,])

F_Low <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_Group==0,])
F_High <- rownames(importantFeaturesDf[importantFeaturesDf$PP_Dev_Group==1,])

jpeg("../plots/venn/venn_All.png", res=150, width=900)
venn.plot <- venn.diagram(
  list(M_Low, F_Low, M_High, F_High), NULL, 
  fill=c("blue", "blue", "red", "red"), 
  alpha=c(0.5,0.5,0.5,0.5), 
  resolution = 150,
  cex = 1, 
  cat.fontface=1, 
  category.names=c("Drive=M\n SD=Low\n", "Drive=F\n Arousal=Low\n", "Drive=M\n SD=High\n", "Drive=F\n Arousal=High\n")
)
grid.draw(venn.plot)
dev.off()
# 
# jpeg("../plots/venn/venn_High.png", res=150, width=700)
# venn.plot <- venn.diagram(
#   list(M_High, F_High), NULL, 
#   fill=c("pink", "red"), 
#   alpha=c(0.5,0.5), 
#   resolution = 150,
#   cex = 1, 
#   cat.fontface=1, 
#   category.names=c("Drive=M", "Drive=F")
# )
# grid.draw(venn.plot)
# dev.off()
```

### Plot feature importance
```{r}
yAxis <- list(
  title = 'Importance',
  range=c(0.0, 1.0)
)
xAxis <- list(
  title = ''
)
importanceDf$FeatureName <- lapply(importanceDf$Feature, function(x) {
  ifelse(x=="Std_PP_3", "SD of Arousal\n in Drive M", 
         ifelse(x=="PP_Dev_2_Turning", "Arousal in Drive C\nat turning segments", 
            ifelse(x=="Activity_C", "Prior stressor\n is Cognitive", x)))
})

fig_Importance <- plot_ly(importanceDf, x = ~FeatureName, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
  add_trace(y = ~Cover, name = 'Cover') %>% 
  add_trace(y = ~Frequency, name = 'Frequency') %>% 
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>% 
  config(.Last.value, mathjax = 'cdn')

htmltools::tagList(fig_Importance)
```

### Feature
```{r}
classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, x = ~Std_PP_3, y = ~PP_Dev, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive \n Hyphenated line indicates discriminative boundary"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1, color="green"))
  ))
htmltools::tagList(figStdVsDev)
```

```{r}
classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, x = ~abs(PP_Dev_2_Turning), y = ~PP_Dev, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1))
  ))
htmltools::tagList(figStdVsDev)
```

```{r}
classColors <- c("blue", "red")
figStdVsDev <- plot_ly(data = importantFeaturesDf, y = ~abs(PP_Dev_2_Turning), x = ~Std_PP_3, 
                       color=~factor(PP_Dev_Group), colors=classColors,
                       marker=list(text="X")) %>%
  layout(xaxis=list(title="SD of Arousal in Motoric Drive"), yaxis=list(title="Arousal at catastrophic event"), showscale=F) %>%
  layout(shapes=list(
    list(x0=0.088, x1=0.088, y0=-0.1, y1=0.25, line=list(dash="dot", width=1))
  ))
htmltools::tagList(figStdVsDev)
```


